{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Host': 'game.gtimg.cn',\n",
       " 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:107.0) Gecko/20100101 Firefox/107.0',\n",
       " 'Accept': '*/*',\n",
       " 'Accept-Language': 'en-US,en;q=0.5',\n",
       " 'Accept-Encoding': 'gzip, deflate, br',\n",
       " 'Origin': 'https://101.qq.com',\n",
       " 'Connection': 'keep-alive',\n",
       " 'Referer': 'https://101.qq.com/',\n",
       " 'Sec-Fetch-Dest': 'empty',\n",
       " 'Sec-Fetch-Mode': 'cors',\n",
       " 'Sec-Fetch-Site': 'cross-site',\n",
       " 'Pragma': 'no-cache',\n",
       " 'Cache-Control': 'no-cache',\n",
       " 'TE': 'trailers'}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 处理header\n",
    "# GET /images/lol/act/img/js/heroList/hero_list.js?ts=2783292 HTTP/2\n",
    "h_txt = \"\"\"\n",
    "Host: game.gtimg.cn\n",
    "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:107.0) Gecko/20100101 Firefox/107.0\n",
    "Accept: */*\n",
    "Accept-Language: en-US,en;q=0.5\n",
    "Accept-Encoding: gzip, deflate, br\n",
    "Origin: https://101.qq.com\n",
    "Connection: keep-alive\n",
    "Referer: https://101.qq.com/\n",
    "Sec-Fetch-Dest: empty\n",
    "Sec-Fetch-Mode: cors\n",
    "Sec-Fetch-Site: cross-site\n",
    "Pragma: no-cache\n",
    "Cache-Control: no-cache\n",
    "TE: trailers\n",
    "\"\"\"\n",
    "header = {}\n",
    "s1 = h_txt.split('\\n')\n",
    "for i in s1:\n",
    "    s2 = i.split(': ')\n",
    "    if (len(s2) == 1):\n",
    "        continue\n",
    "    header[s2[0]] = s2[1]\n",
    "header\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 安装所需的包\n",
    "# ! conda install requests lxml\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Response [200]>"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 导入request处理请求\n",
    "import requests\n",
    "\n",
    "# 待爬取JSON的URL\n",
    "url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2783292'\n",
    "\n",
    "# GET请求URL（因为我们访问的时候也是用的GET method）\n",
    "resp=requests.get(url=url,headers=header)\n",
    "resp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "# 待爬取的URL\n",
    "url = 'https://vdn6.vzuu.com/HD/e96189f8-7abe-11ed-a7fc-124ed1fb05ee-v8_f2_t1_n5PyZZkc.mp4?pkey=AAUOLHfDKmrA4tGSsWf3JD75wqFnvYeumejn3r7pvO4CW7GFDo6Car7wvFngXL2sJC9iPrhqLWNyYmiHj3OYN9xs&c=avc.8.0&f=mp4&pu=1513c7c2&bu=http-1513c7c2&expiration=1672668732&v=ks6&pf=Web&pt=zhihu'\n",
    "\n",
    "resp=requests.get(url=url)\n",
    "with open(f'a.mp4','wb') as f:\n",
    "  f.write( resp.content)\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_46167/1045208517.py:2: DeprecationWarning: invalid escape sequence '\\/'\n",
      "  json_str=resp.content.decode('unicode_escape')\n"
     ]
    }
   ],
   "source": [
    "# 'unicode_escape' 避免中文变成unicode字节码\n",
    "json_str=resp.content.decode('unicode_escape')\n",
    "# json_str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'heroId': '1',\n",
       " 'name': '黑暗之女',\n",
       " 'alias': 'Annie',\n",
       " 'title': '安妮',\n",
       " 'roles': ['mage'],\n",
       " 'isWeekFree': '0',\n",
       " 'attack': '2',\n",
       " 'defense': '3',\n",
       " 'magic': '10',\n",
       " 'difficulty': '6',\n",
       " 'selectAudio': 'https://game.gtimg.cn/images/lol/act/img/vo/choose/1.ogg',\n",
       " 'banAudio': 'https://game.gtimg.cn/images/lol/act/img/vo/ban/1.ogg',\n",
       " 'isARAMweekfree': '0',\n",
       " 'ispermanentweekfree': '0',\n",
       " 'changeLabel': '无改动',\n",
       " 'goldPrice': '4800',\n",
       " 'couponPrice': '2000',\n",
       " 'camp': '',\n",
       " 'campId': '',\n",
       " 'keywords': '安妮,黑暗之女,火女,Annie,anni,heianzhinv,huonv,an,hazn,hn',\n",
       " 'instance_id': '0b95894e-0df2-470e-b282-6c5f5cf41955'}"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from jsonpath import jsonpath\n",
    "import json\n",
    "json_dict=json.loads(json_str)\n",
    "# type(json_dict)\n",
    "ret = jsonpath(json_dict, '$..hero')\n",
    "ret[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hero=json_dict['hero']\n",
    "type(hero)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'<!DOCTYPE html>\\r\\n<html>\\r\\n\\r\\n<head>\\r\\n  <meta charset=\"gbk\">\\r\\n  <meta http-equiv=\"X-UA-Compatible\" content=\"ie=edge,chrome=1\">\\r\\n  <meta name=\"tgtest-ignore\" content=\"all\">\\r\\n  <!--<meta name=\"viewport\" id=\"viewport\" content=\"width=device-width, user-scalable=no, viewport-fit=cover\">-->\\r\\n  <meta name=\"viewport\" content=\"width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no\">\\r\\n  <meta name=\"format-detection\" content=\"telephone=no\">\\r\\n  <meta name=\"apple-mobile-web-app-capable\" content=\"yes\">\\r\\n  <meta name=\"format-detection\" content=\"telephone=no\" />\\r\\n  <link rel=\"icon\" href=\"https://game.gtimg.cn/images/lol/lolstrategy/game.ico\" type=\"image/x-icon\">\\r\\n  <link rel=\"shortcut icon\" href=\"https://game.gtimg.cn/images/lol/lolstrategy/game.ico\" type=\"image/x-icon\">\\r\\n  <meta name=\"keywords\" content=\"Ó¢ÐÛÁªÃË,lol,league of legends,lol¹¥ÂÔ,lolÊÓÆµ,lolÊÓÆµ¹¥ÂÔ,lolÓ¢ÐÛ×ÊÁÏ,lolÕ½ÕùÑ§Ôº,Ð¡²Ô½âËµÊÓÆµ,101Õ½ÕùÑ§Ôº,Ó¢ÐÛ\" />\\r\\n  <meta name=\"description\" content=\"Ó¢ÐÛÁªÃË101Õ½ÕùÑ§Ôº£¬ÍÆ¼ö¹¥ÂÔ£¬ÊÓÆµ¹¥ÂÔ£¬Ð¡²Ô½âËµÊÓÆµ\" />\\r\\n  <title>¹¥ÂÔÖÐÐÄ-Ó¢ÐÛÁªÃË¹Ù·½ÍøÕ¾-ÌÚÑ¶ÓÎÏ·</title>\\r\\n\\r\\n  <style>\\r\\n      html,body{\\r\\n          filter: grayscale(100%)!important;\\r\\n          -webkit-filter: grayscale(95%)!important;\\r\\n          -moz-filter: grayscale(100%)!important;\\r\\n          -ms-filter: grayscale(100%)!important;\\r\\n          -o-filter: grayscale(100%)!important;\\r\\n          filter: progid:DXImageTransform.Microsoft.BasicImage(grayscale=1);\\r\\n          -webkit-filter: grayscale(1)!important;\\r\\n      }\\r\\n      #afooter{\\r\\n          display: none!important;\\r\\n      }\\r\\n  </style>\\r\\n  <script>\\r\\n    //ÆÁÄ»ÊÊÓ¦\\r\\n    // (function (win, doc) {\\r\\n    //   if (!win.addEventListener) return;\\r\\n    //\\r\\n    //   function setFont() {\\r\\n    //     var html = document.documentElement;\\r\\n    //     var k = 2560;\\r\\n    //     if (html.clientWidth / k * 100 >= 100) html.style.fontSize = \\'100px\\';\\r\\n    //     else if (html.clientWidth / k * 100 <= 64) html.style.fontSize = \\'64px\\';\\r\\n    //     else html.style.fontSize = html.clientWidth / k * 100 + \\'px\\';\\r\\n    //     // html.style.fontSize = html.clientWidth / k * 100 + \\'px\\';\\r\\n    //   }\\r\\n    //\\r\\n    //   setFont();\\r\\n    //   setTimeout(function () {\\r\\n    //     setFont();\\r\\n    //   }, 300);\\r\\n    //   doc.addEventListener(\\'DOMContentLoaded\\', setFont, false);\\r\\n    //   win.addEventListener(\\'resize\\', setFont, false);\\r\\n    //   win.addEventListener(\\'load\\', setFont, false);\\r\\n    // })(window, document);\\r\\n    //tft\\r\\n    (function (win, doc) {\\r\\n      if (!win.addEventListener) return;\\r\\n\\r\\n      function setFont() {\\r\\n        var html = document.documentElement;\\r\\n        var k = 1920;\\r\\n        if (html.clientWidth / k * 100 >= 100) html.style.fontSize = \\'100px\\';\\r\\n        else if (html.clientWidth / k * 100 <= 64) html.style.fontSize = \\'64px\\';\\r\\n        else html.style.fontSize = html.clientWidth / k * 100 + \\'px\\';\\r\\n      }\\r\\n\\r\\n      setFont();\\r\\n      setTimeout(function () {\\r\\n        setFont();\\r\\n      }, 300);\\r\\n      doc.addEventListener(\\'DOMContentLoaded\\', setFont, false);\\r\\n      win.addEventListener(\\'resize\\', setFont, false);\\r\\n      win.addEventListener(\\'load\\', setFont, false);\\r\\n    })(window, document);\\r\\n  </script>\\r\\n  <style>\\r\\n    body {background-color: #111d29;color: #ffffff;font-size: 20px!important;}\\r\\n    .slide-fade-enter-active ,\\r\\n    .slideUp-fade-enter-active {\\r\\n      transition: all .8s ease;\\r\\n    }\\r\\n    .slide-fade-leave-active,\\r\\n    .slideUp-fade-leave-active {\\r\\n      transition: all .3s cubic-bezier(1.0, 0.5, 0.8, 1.0);\\r\\n    }\\r\\n    .slide-fade-enter,\\r\\n    .slide-fade-leave-to\\r\\n      /* .slide-fade-leave-active for below version 2.1.8 */ {\\r\\n      transform: translateX(10px);\\r\\n      opacity: 0;\\r\\n    }\\r\\n    .slideUp-fade-enter,\\r\\n    .slideUp-fade-leave-to\\r\\n      /* .slide-fade-leave-active for below version 2.1.8 */ {\\r\\n      transform: translateY(10px);\\r\\n      opacity: 0;\\r\\n    }\\r\\n\\r\\n  </style>\\r\\n  <link rel=\"stylesheet\" href=\"css/swiper-bundle.css\">\\r\\n  <link rel=\"stylesheet\" href=\"css/comm.css?20210528\">\\r\\n  <link rel=\"stylesheet\" href=\"css/rank.css?20211027\">\\r\\n</head>\\r\\n<body>\\r\\n  <div class=\"app-wrapper\" id=\"app\">\\r\\n    <transition name=\"slide-fade\" mode=\"out-in\">\\r\\n      <keep-alive>\\r\\n        <router-view></router-view>\\r\\n      </keep-alive>\\r\\n    </transition>\\r\\n  </div>\\r\\n\\r\\n\\r\\n  <script src=\"//game.gtimg.cn/images/js/2018foot/foot.js\"></script>\\r\\n  <script src=\"//js.aq.qq.com/js/aq_common.js\"></script>\\r\\n  <script src=\"//ossweb-img.qq.com/images/js/milo_bundle/milo.js\" charset=\"gbk\"></script>\\r\\n  <script src=\"//game.gtimg.cn/images/js/PTT/ping_tcss_tgideas_https_min.js\"></script>\\r\\n  <script src=\"js/lib/util.charset.js\"></script>\\r\\n  <script src=\"js/lib/vendor.js\" charset=\"utf-8\"></script>\\r\\n  <script src=\"js/swiper-bundle.min.js\"></script>\\r\\n  <script src=\"js/echarts.min.js\" charset=\"utf-8\"></script>\\r\\n  <script src=\"js/styledark.js\" charset=\"utf-8\"></script>\\r\\n  <script src=\"js/base.js?202103191430\"></script>\\r\\n  <script src=\"js/api.js?20210528\"></script>\\r\\n  <script src=\"js/store.js?202103191430\"></script>\\r\\n  <script src=\"js/app.js?20210528\"></script>\\r\\n  <script>\\r\\n\\r\\n    var setSite = {\\r\\n      siteType: \"101\",\\r\\n      pageType: \"main\",\\r\\n      pageName: \"¹¥ÂÔÖÐÐÄ\",\\r\\n      project: \"base\",\\r\\n      osact: \"pc\"\\r\\n    };\\r\\n    if (typeof(pgvMain) == \\'function\\') pgvMain();\\r\\n  </script>\\r\\n</body>\\r\\n\\r\\n</html>\\r\\n'"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 图片\n",
    "import requests\n",
    "\n",
    "url='https://101.qq.com/#/hero'\n",
    "\n",
    "resp=requests.get(url)\n",
    "html_str=resp.content.decode('unicode_escape')\n",
    "html_str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element html at 0x7fa9785d8640>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from lxml import etree\n",
    "html = etree.HTML(html_str)\n",
    "html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html.xpath('/html/body/div[1]/div/div[3]/div/div/ul/li[1]/div/div/img')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "import time\n",
    "url='https://101.qq.com/#/hero'\n",
    "path='/home/quanwei/workspaceFolder/mxl-proj/python-spyder/geckodriver'\n",
    "# 打开浏览器\n",
    "browser = webdriver.Firefox(executable_path=path)\n",
    "# 访问网页\n",
    "browser.get(url)\n",
    "time.sleep(10)\n",
    "\n",
    "ul=browser.find_element_by_xpath('/html/body/div[1]/div/div[3]/div/div/ul')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://game.gtimg.cn/images/lol/act/img/skinloading/2000.jpg'"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "img=ul.find_elements_by_xpath('.//img')\n",
    "img[1].get_attribute('src')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'纳祖芒荣耀'"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name=ul.find_elements_by_class_name('hero-name')\n",
    "name[161].text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "162"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "162"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hero)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'heroId': '1',\n",
       " 'name': '黑暗之女',\n",
       " 'alias': 'Annie',\n",
       " 'title': '安妮',\n",
       " 'roles': ['mage'],\n",
       " 'isWeekFree': '0',\n",
       " 'attack': '2',\n",
       " 'defense': '3',\n",
       " 'magic': '10',\n",
       " 'difficulty': '6',\n",
       " 'selectAudio': 'https://game.gtimg.cn/images/lol/act/img/vo/choose/1.ogg',\n",
       " 'banAudio': 'https://game.gtimg.cn/images/lol/act/img/vo/ban/1.ogg',\n",
       " 'isARAMweekfree': '0',\n",
       " 'ispermanentweekfree': '0',\n",
       " 'changeLabel': '无改动',\n",
       " 'goldPrice': '4800',\n",
       " 'couponPrice': '2000',\n",
       " 'camp': '',\n",
       " 'campId': '',\n",
       " 'keywords': '安妮,黑暗之女,火女,Annie,anni,heianzhinv,huonv,an,hazn,hn',\n",
       " 'instance_id': '0b95894e-0df2-470e-b282-6c5f5cf41955'}"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hero[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'heroId': '1',\n",
       " 'name': '黑暗之女',\n",
       " 'alias': 'Annie',\n",
       " 'title': '安妮',\n",
       " 'roles': ['mage'],\n",
       " 'isWeekFree': '0',\n",
       " 'attack': '2',\n",
       " 'defense': '3',\n",
       " 'magic': '10',\n",
       " 'difficulty': '6',\n",
       " 'selectAudio': 'https://game.gtimg.cn/images/lol/act/img/vo/choose/1.ogg',\n",
       " 'banAudio': 'https://game.gtimg.cn/images/lol/act/img/vo/ban/1.ogg',\n",
       " 'isARAMweekfree': '0',\n",
       " 'ispermanentweekfree': '0',\n",
       " 'changeLabel': '无改动',\n",
       " 'goldPrice': '4800',\n",
       " 'couponPrice': '2000',\n",
       " 'camp': '',\n",
       " 'campId': '',\n",
       " 'keywords': '安妮,黑暗之女,火女,Annie,anni,heianzhinv,huonv,an,hazn,hn',\n",
       " 'instance_id': '0b95894e-0df2-470e-b282-6c5f5cf41955',\n",
       " 'imgUrl': 'https://game.gtimg.cn/images/lol/act/img/skinloading/1000.jpg'}"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 添加图片属性\n",
    "for i in range(len(hero)):\n",
    "  imgUrl=img[i].get_attribute('src')\n",
    "  hero[i][\"imgUrl\"]=imgUrl\n",
    "hero[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "# json -> string\n",
    "json_obj={\"hero\":hero}\n",
    "\n",
    "ret_str=json.dumps(json_obj,ensure_ascii=False)\n",
    "# json_str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "with open(\"../share-file/lol_hero.json\",'w') as f:\n",
    "  f.write(ret_str)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.12 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12 (main, Apr  5 2022, 06:56:58) \n[GCC 7.5.0]"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "4c2b8563932708ac4f64e3bc4797ea8eb43fc7530baa7f01daadf5b46d7b1fd4"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
